#!/usr/bin/python3

# SPDX-FileCopyrightText: Red Hat, Inc.
# SPDX-License-Identifier: GPL-2.0-or-later

"""
Parse repoStats log lines and plot graphs of lastCheck and read delay for all
storage domains.

Usage: repoplot vdsm.log [...]

Requirements: python-pandas
"""

from __future__ import absolute_import

import argparse
import fileinput
import sys

from collections import defaultdict
from collections import namedtuple

# Use non-interactive backend so we can generate graphs on a headless server.
# See http://matplotlib.org/faq/howto_faq.html#howto-webapp
import matplotlib
matplotlib.use("Agg")

import pandas
from matplotlib import pyplot


def main(args):
    args = parse_args(args)
    stats = parse(args.files)
    filename = "%s.%s" % ((args.name or args.files[0]), args.format)
    plot(stats, filename, (args.width, args.height))


def parse_args(args):
    parser = argparse.ArgumentParser(
        description="Visualize storage monitoring")
    parser.add_argument("--name", "-n", dest="name",
                        help="output file name (default first log file name)")
    parser.add_argument("--format", "-f", dest="format",
                        help="output file format (default pdf)")
    parser.add_argument("--width", "-x", dest="width", type=int,
                        help="output file width in inches (default 20)")
    parser.add_argument("--height", "-y", dest="height", type=int,
                        help="output file height in inches (default 30)")
    parser.add_argument("files", nargs="+",
                        help="vdsm log files to process")
    parser.set_defaults(name=None, format="pdf", width=20, height=30)
    return parser.parse_args(args)


Log = namedtuple("Log", (
    "thread",
    "loglevel",
    "timestamp",
    "module",
    "lineno",
    "logger",
    "func",
    "text"
))


class Stats(object):
    def __init__(self):
        self.repostats = defaultdict(DomainStats)
        self.lvm_commands = defaultdict(CommandStats)
        self.mailbox_commands = defaultdict(CommandStats)
        self.monitor_commands = defaultdict(CommandStats)
        self.disk_commands = defaultdict(CommandStats)


class DomainStats(object):

    def __init__(self):
        self.timestamp = []
        self.lastcheck = []
        self.delay = []

    def add(self, timestamp, lastcheck, delay):
        """
        Called when repoStats event was detected for this domain.
        """
        self.timestamp.append(timestamp)
        self.lastcheck.append(lastcheck)
        self.delay.append(delay)


class CommandStats(object):

    def __init__(self):
        self.timestamp = []
        self.runtime = []
        self.running = False

    def start(self, timestamp):
        """
        Called when a commnad start log was detected for this thread.
        """
        self.running = True
        self.timestamp.append(timestamp)
        self.runtime.append(0)

    def end(self, timestamp):
        """
        Called when a commnad end log was detected for this thread.
        """
        if not self.running:
            return
        self.running = False
        timedelta = timestamp - self.timestamp[-1]
        self.timestamp.append(timestamp)
        self.runtime.append(timedelta.total_seconds())
        # Add zero in the same timestamp, to get nicer triangles in the plots.
        self.timestamp.append(timestamp)
        self.runtime.append(0)


def parse(files):
    """
    Parse patterns from vdsm log and return Stats object.
    """
    patterns = [
        # Match repoStats response log:
        # jsonrpc.Executor/5::INFO::2016-02-17
        # 19:26:33,837::logUtils::51::dispatcher::(wrapper) Run and protect:
        # repoStats, Return response: ...
        (add_repostats, "Run and protect: repoStats, Return response:",
            "repostats"),

        # Match storage domains getReadDelay commands.
        #
        # Begin:
        # Thread-16::DEBUG::2016-02-17
        # 20:14:37,049::blockSD::436::Storage.Misc.excCmd::(getReadDelay)
        # /usr/bin/taskset --cpu-list 0-7 /usr/bin/dd ...
        #
        # End:
        # Thread-16::DEBUG::2016-02-17
        # 20:14:37,061::blockSD::436::Storage.Misc.excCmd::(getReadDelay)
        # SUCCESS: ...
        (add_command, "::Storage.Misc.excCmd::(getReadDelay)",
            "monitor_commands"),

        # Match LVM commands begin or end logs.
        #
        # Begin:
        # Thread-61::DEBUG::2016-02-17
        # 19:26:33,854::lvm::286::Storage.Misc.excCmd::(cmd) /usr/bin/taskset
        # --cpu-list 0-7 /usr/bin/sudo -n /usr/sbin/lvm vgck ...
        #
        # End:
        # Thread-61::DEBUG::2016-02-17
        # 19:26:37,933::lvm::286::Storage.Misc.excCmd::(cmd) SUCCESS: ...
        (add_command, "::Storage.Misc.excCmd::(cmd)", "lvm_commands"),

        # Match mailbox monitor check for mail dd commands (master domain
        # only).
        #
        # Begin:
        # mailbox.SPMMonitor::DEBUG::2016-02-17
        # 20:15:24,521::storage_mailbox::731::Storage.Misc.excCmd::
        # (_checkForMail)
        # /usr/bin/taskset --cpu-list 0-7 dd ...
        #
        # End:
        # mailbox.SPMMonitor::DEBUG::2016-02-17
        # 20:15:24,547::storage_mailbox::731::Storage.Misc.excCmd::
        # (_checkForMail)
        # SUCCESS: ...
        (add_command, "::Storage.Misc.excCmd::(_checkForMail)",
            "mailbox_commands"),

        # Match qemu-img operation used for copying images in 4.0.
        #
        # Begin:
        # 10db509a-d5e4-41e8-8751-1305f0852d68::DEBUG::2016-02-17
        # 19:32:49,865::image::137::Storage.Image::
        # (_wait_for_qemuimg_operation)
        # waiting for qemu-img operation to complete
        #
        # End:
        # 10db509a-d5e4-41e8-8751-1305f0852d68::DEBUG::2016-02-17
        # 19:40:14,134::image::146::Storage.Image::
        # (_wait_for_qemuimg_operation)
        # qemu-img operation has completed
        (add_qemuimg_operation,
            "::Storage.Image::(_wait_for_qemuimg_operation)",
            "disk_commands"),

        # Match qemu-img convert used for copying images in 3.6. This matches
        # the start of the command.
        #
        # d9633df0-5f67-442a-861b-91d417b046f5::DEBUG::2016-02-09
        # 12:37:59,885::utils::669::root::(execCmd) /usr/bin/taskset --cpu-list
        # 0-23 /usr/bin/nice -n 19 /usr/bin/ionice -c 3 /usr/bin/qemu-img
        # convert ...
        (add_qemuimg_copy, "::root::(execCmd)", "disk_commands"),

        # Match qemu-img convert used for copying images in 3.6. This matches
        # the end of the command.
        #
        # d9633df0-5f67-442a-861b-91d417b046f5::DEBUG::2016-02-09
        # 12:39:51,177::utils::716::root::(watchCmd) SUCCESS: ...
        (add_qemuimg_copy, "::root::(watchCmd)", "disk_commands"),

        # Match qemu-img convert used for copying disks on 3.5.
        #
        # Begin:
        # 8c6193e7-cec6-44d1-afa2-2dd3437d40c8::DEBUG::2014-03-25
        # 00:37:46,374::utils::546::Storage.Misc.excCmd::(watchCmd) '/bin/nice
        # -n 19 /usr/bin/ionice -c 3 /usr/bin/qemu-img convert ...
        #
        # End:
        # ffe3217e-6a91-4499-910b-cab975c5a3e4::DEBUG::2016-02-20
        # 21:10:00,808::utils::718::root::(watchCmd) SUCCESS: ...
        (add_qemuimg_copy, "::Storage.Misc.excCmd::(watchCmd)",
            "disk_commands"),

        # Match dd copy commands used for copying disks on 3.5.
        #
        # Begin:
        # d5e74756-f1df-407d-9d83-0b4b4c844b15::DEBUG::2016-02-27
        # 20:51:37,210::utils::796::Storage.Misc.excCmd::(watchCmd)
        # /usr/bin/nice -n 19 /usr/bin/ionice -c 3 /usr/bin/dd ...
        #
        # End:
        # d5e74756-f1df-407d-9d83-0b4b4c844b15::DEBUG::2016-02-27
        # 20:52:00,310::utils::808::Storage.Misc.excCmd::(watchCmd) SUCCESS:
        (add_dd_copy, "::Storage.Misc.excCmd::(watchCmd)", "disk_commands"),
    ]

    stats = Stats()

    for line in fileinput.input(files):
        for func, pattern, name in patterns:
            if pattern in line:
                log = parse_log(line)
                substats = getattr(stats, name)
                func(substats, log)

    return stats


def add_repostats(repostats, log):
    """
    Add repostats samples from repoStats response line
    """
    start = log.text.find("{")
    response = eval(log.text[start:])
    for uuid, info in response.items():
        lastcheck = float(info["lastCheck"])
        delay = float(info["delay"])
        repostats[uuid].add(log.timestamp, lastcheck, delay)


def add_command(commands, log):
    """
    Add commands start and stop events from execCmd calls
    """
    if is_command_end(log):
        commands[log.thread].end(log.timestamp)
    else:
        commands[log.thread].start(log.timestamp)


def add_qemuimg_operation(commands, log):
    """
    Add qemu-img commands start and stop events
    """
    if "waiting for qemu-img operation" in log.text:
        commands[log.thread].start(log.timestamp)
    elif "operation has completed" in log.text:
        commands[log.thread].end(log.timestamp)
    else:
        pass  # Periodic progress reports


def add_qemuimg_copy(commands, log):
    """
    Add qemu-img copy commands start or stop events (pre 4.0).
    """
    if is_command_end(log):
        commands[log.thread].end(log.timestamp)
    elif "/bin/qemu-img convert " in log.text:
        commands[log.thread].start(log.timestamp)


def add_dd_copy(commands, log):
    """
    Add dd copy commands start or stop events (3.5).
    """
    if is_command_end(log):
        commands[log.thread].end(log.timestamp)
    elif "/bin/dd " in log.text:
        commands[log.thread].start(log.timestamp)


def is_command_end(log):
    return log.text.startswith(("SUCCESS:", "ERROR:"))


def parse_log(line):
    # thread::loglevel::timestamp::module::lineno::logger::(func) text
    fields = line.split("::", 6)
    # Split "(func) text"
    func, text = fields[6].split(" ", 1)
    fields[6] = func[1:-1]
    fields.append(text)
    # Parse timestamp
    timestamp = fields[2]
    timestamp = timestamp.replace(",", ".")
    fields[2] = pandas.Timestamp(timestamp)
    return Log(*fields)


def dataframe(stats, key):
    """
    Create pandas.DataFrame with one column per domain for given key.
    """
    dfs = []
    for uuid, ds in stats.iteritems():
        df = pandas.DataFrame(getattr(ds, key), index=ds.timestamp,
                              columns=[uuid])
        dfs.append(df)
    combined = pandas.concat(dfs, axis=1)
    return combined


def plot(stats, filename, size):
    rows = 6
    columns = 1

    pyplot.figure(figsize=size, dpi=300)

    pyplot.subplot(rows, columns, 1)
    pyplot.title("lastCheck")
    pyplot.ylabel("lastCheck (seconds)")
    pyplot.xlabel("time")
    pyplot.grid(True)

    lastcheck = dataframe(stats.repostats, "lastcheck")
    pyplot.plot(lastcheck.index, lastcheck)

    pyplot.axhline(y=30, color="gray", linewidth="2")

    # Show values up to 330 seconds. Bigger values will cause a host to become
    # non-operational.
    pyplot.axis([lastcheck.index[0], lastcheck.index[-1], 0, 330])

    pyplot.subplot(rows, columns, 2)
    pyplot.title("read delay")
    pyplot.ylabel("delay (seconds)")
    pyplot.xlabel("time")
    pyplot.grid(True)

    delay = dataframe(stats.repostats, "delay")
    pyplot.plot(delay.index, delay)

    pyplot.axhline(y=5, color="gray", linewidth="2")

    # Show values up to 10 seconds. Values bigger then 5 seconds will show a
    # warning in engine log.
    pyplot.axis([lastcheck.index[0], lastcheck.index[-1], 0, 10])

    pyplot.subplot(rows, columns, 3)
    pyplot.title("LVM commands")
    pyplot.ylabel("runtime (seconds)")
    pyplot.xlabel("time")
    pyplot.xlim(lastcheck.index[0], lastcheck.index[-1])
    pyplot.grid(True)

    for thread, cs in stats.lvm_commands.iteritems():
        pyplot.plot(cs.timestamp, cs.runtime)

    pyplot.subplot(rows, columns, 4)
    pyplot.title("Mailbox commands")
    pyplot.ylabel("runtime (seconds)")
    pyplot.xlabel("time")
    pyplot.xlim(lastcheck.index[0], lastcheck.index[-1])
    pyplot.grid(True)

    for thread, cs in stats.mailbox_commands.iteritems():
        pyplot.plot(cs.timestamp, cs.runtime)

    pyplot.subplot(rows, columns, 5)
    pyplot.title("Storage domain monitor commands")
    pyplot.ylabel("runtime (seconds)")
    pyplot.xlabel("time")
    pyplot.xlim(lastcheck.index[0], lastcheck.index[-1])
    pyplot.grid(True)

    for thread, cs in stats.monitor_commands.iteritems():
        pyplot.plot(cs.timestamp, cs.runtime)

    pyplot.subplot(rows, columns, 6)
    pyplot.title("disk commands")
    pyplot.ylabel("runtime (seconds)")
    pyplot.xlabel("time")
    pyplot.xlim(lastcheck.index[0], lastcheck.index[-1])
    pyplot.grid(True)

    for thread, cs in stats.disk_commands.iteritems():
        pyplot.plot(cs.timestamp, cs.runtime)

    pyplot.savefig(filename, bbox_inches="tight")


if __name__ == "__main__":
    main(sys.argv[1:])
